//
// Project: Disagreement in science: Missing women



clear all
version 15.1  



//
// set locals

// method of identifying gender
local female "female_genderize"
local male "male_genderize"

// gender of author is known
local known_gender "female_genderize!=."




//
// AER

// call data
use "${data}/output/aer_data_gender.dta", clear
drop if month=="May" & year!=2019  // exclude AEA papers and proceedings
drop if year==2020  
keep if (comment | research_article)

// count number of authors in article
by article_id, sort: egen number_authors = count(article_id)
replace number_authors=4 if number_authors>4 & number_authors!=.  // 4+ authors into single bin
collapse (firstnm) number_authors year comment research_article, by(article_id)

// analysis
tab number_authors comment, col
twoway (hist number_authors if comment==1, discrete barwidth(0.5) lcolor(none) fcolor(gs11%80)) (hist number_authors if comment==0, discrete barwidth(0.5) lcolor(gs3) fcolor(none) lwidth(medthick)), plotregion(style(none)) scheme(s1color) title("") xtitle("Number of authors") ytitle("Share of articles with given number of authors") xlabel(0.5 `" "' 1 `"1"' 2 `"2"' 3 `"3"' 4 `"4+"' 4.5 `" "', noticks labsize(medsmall)) ylabel(0 .20 `"0.20"' .40 `"0.40"' .60 `"0.60"' .80 `"0.80"' 1 `"1"', grid angle(horizontal) labsize(medsmall)) yscale(r(., 1)) legend(row(1) size(medsmall) order(2 1) label(2 "regular article") label(1 "comment") region(lwidth(none)))
graph export "${output}/number_authors_aer.eps", replace
graph export "${output}/number_authors_aer.png", replace



//
// ASR

// call data
use "${data}/output/asr_data_gender.dta", clear
keep if (comment | research_article)

// count number of authors in article
by article_id, sort: egen number_authors = count(article_id)
replace number_authors=4 if number_authors>4 & number_authors!=.  // 4+ authors into single bin
collapse (firstnm) number_authors year comment research_article, by(article_id)

// analysis
tab number_authors comment, col
twoway (hist number_authors if comment==1, discrete barwidth(0.5) lcolor(none) fcolor(gs11%80)) (hist number_authors if comment==0, discrete barwidth(0.5) lcolor(gs3) fcolor(none) lwidth(medthick)), plotregion(style(none)) scheme(s1color) title("") xtitle("Number of authors") ytitle("Share of articles with given number of authors") xlabel(0.5 `" "' 1 `"1"' 2 `"2"' 3 `"3"' 4 `"4+"' 4.5 `" "', noticks labsize(medsmall)) ylabel(0 .20 `"0.20"' .40 `"0.40"' .60 `"0.60"' .80 `"0.80"' 1 `"1"', grid angle(horizontal) labsize(medsmall)) yscale(r(., 1)) legend(row(1) size(medsmall) order(2 1) label(2 "regular article") label(1 "comment") region(lwidth(none)))
graph export "${output}/number_authors_asr.eps", replace
graph export "${output}/number_authors_asr.png", replace



//
// JAMA

// call data
use "${data}/output/jama_pubmed_data_gender.dta", clear
drop if year==2020  
drop if year<2002  // full author names from PubMed not available
drop if year<2013  // Comment & Response section started in July 2013
drop if year==2013 & month=="January"  // Comment & Response section started in July 2013
drop if year==2013 & month=="February"  // Comment & Response section started in July 2013
drop if year==2013 & month=="March"  // Comment & Response section started in July 2013
drop if year==2013 & month=="April"  // Comment & Response section started in July 2013
drop if year==2013 & month=="May"  // Comment & Response section started in July 2013
drop if year==2013 & month=="June"  // Comment & Response section started in July 2013 
keep if comment | research_article
drop if article_with_etal
drop if strpos(full_name, "Fontanarosa")  // this JAMA editor appeared as first author of letters to the editor

// count number of authors in article
by article_id, sort: egen number_authors = count(article_id)
replace number_authors=4 if number_authors>4 & number_authors!=.  // 4+ authors into single bin
collapse (firstnm) number_authors year comment research_article, by(article_id)

// analysis
tab number_authors comment, col
twoway (hist number_authors if comment==1, discrete barwidth(0.5) lcolor(none) fcolor(gs11%80)) (hist number_authors if comment==0, discrete barwidth(0.5) lcolor(gs3) fcolor(none) lwidth(medthick)), plotregion(style(none)) scheme(s1color) title("") xtitle("Number of authors") ytitle("Share of articles with given number of authors") xlabel(0.5 `" "' 1 `"1"' 2 `"2"' 3 `"3"' 4 `"4+"' 4.5 `" "', noticks labsize(medsmall)) ylabel(0 .20 `"0.20"' .40 `"0.40"' .60 `"0.60"' .80 `"0.80"' 1 `"1"', grid angle(horizontal) labsize(medsmall)) yscale(r(., 1)) legend(row(1) size(medsmall) order(2 1) label(2 "regular article") label(1 "comment") region(lwidth(none)))
graph export "${output}/number_authors_jama.eps", replace
graph export "${output}/number_authors_jama.png", replace



//
// Nature

// call data
use "${data}/output/nature_data_gender.dta", clear
drop if year==2020
keep if comment | research_article

// count number of authors in article
by article_id, sort: egen number_authors = count(article_id)
replace number_authors=4 if number_authors>4 & number_authors!=.  // 4+ authors into single bin
collapse (firstnm) number_authors year comment research_article, by(article_id)

// analysis
tab number_authors comment, col
twoway (hist number_authors if comment==1, discrete barwidth(0.5) lcolor(none) fcolor(gs11%80)) (hist number_authors if comment==0, discrete barwidth(0.5) lcolor(gs3) fcolor(none) lwidth(medthick)), plotregion(style(none)) scheme(s1color) title("") xtitle("Number of authors") ytitle("Share of articles with given number of authors") xlabel(0.5 `" "' 1 `"1"' 2 `"2"' 3 `"3"' 4 `"4+"' 4.5 `" "', noticks labsize(medsmall)) ylabel(0 .20 `"0.20"' .40 `"0.40"' .60 `"0.60"' .80 `"0.80"' 1 `"1"', grid angle(horizontal) labsize(medsmall)) yscale(r(., 1)) legend(row(1) size(medsmall) order(2 1) label(2 "regular article") label(1 "comment") region(lwidth(none)))
graph export "${output}/number_authors_nature.eps", replace
graph export "${output}/number_authors_nature.png", replace



//
// PNAS 

// call data
use "${data}/output/pnas_data_gender.dta", clear
drop if year==2020 | year<2008
keep if comment | research_article

// count number of authors in article
by article_id, sort: egen number_authors = count(article_id)
replace number_authors=4 if number_authors>4 & number_authors!=.  // 4+ authors into single bin
collapse (firstnm) number_authors year comment research_article, by(article_id)

// analysis
tab number_authors comment, col
twoway (hist number_authors if comment==1, discrete barwidth(0.5) lcolor(none) fcolor(gs11%80)) (hist number_authors if comment==0, discrete barwidth(0.5) lcolor(gs3) fcolor(none) lwidth(medthick)), plotregion(style(none)) scheme(s1color) title("") xtitle("Number of authors") ytitle("Share of articles with given number of authors") xlabel(0.5 `" "' 1 `"1"' 2 `"2"' 3 `"3"' 4 `"4+"' 4.5 `" "', noticks labsize(medsmall)) ylabel(0 .20 `"0.20"' .40 `"0.40"' .60 `"0.60"' .80 `"0.80"' 1 `"1"', grid angle(horizontal) labsize(medsmall)) yscale(r(., 1)) legend(row(1) size(medsmall) order(2 1) label(2 "regular article") label(1 "comment") region(lwidth(none)))
graph export "${output}/number_authors_pnas.eps", replace
graph export "${output}/number_authors_pnas.png", replace



//
// Science

// call data
use "${data}/output/science_data_gender.dta", clear
drop if year==2020
keep if comment | research_article

// count number of authors in article
by article_id, sort: egen number_authors = count(article_id)
replace number_authors=4 if number_authors>4 & number_authors!=.  // 4+ authors into single bin
collapse (firstnm) number_authors year comment research_article, by(article_id)

// analysis
tab number_authors comment, col
twoway (hist number_authors if comment==1, discrete barwidth(0.5) lcolor(none) fcolor(gs11%80)) (hist number_authors if comment==0, discrete barwidth(0.5) lcolor(gs3) fcolor(none) lwidth(medthick)), plotregion(style(none)) scheme(s1color) title("") xtitle("Number of authors") ytitle("Share of articles with given number of authors") xlabel(0.5 `" "' 1 `"1"' 2 `"2"' 3 `"3"' 4 `"4+"' 4.5 `" "', noticks labsize(medsmall)) ylabel(0 .20 `"0.20"' .40 `"0.40"' .60 `"0.60"' .80 `"0.80"' 1 `"1"', grid angle(horizontal) labsize(medsmall)) yscale(r(., 1)) legend(row(1) size(medsmall) order(2 1) label(2 "regular article") label(1 "comment") region(lwidth(none)))
graph export "${output}/number_authors_science.eps", replace
graph export "${output}/number_authors_science.png", replace





